Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """Restriction Enzyme classes. 
  12   
  13  Notes about the diverses class of the restriction enzyme implementation:: 
  14   
  15              RestrictionType is the type of all restriction enzymes. 
  16          ---------------------------------------------------------------------------- 
  17              AbstractCut implements some methods that are common to all enzymes. 
  18          ---------------------------------------------------------------------------- 
  19              NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  20                                      produced by the enzyme. 
  21                                      they correspond to the 4th field of the 
  22                                      rebase record emboss_e.NNN. 
  23                      0->NoCut    : the enzyme is not characterised. 
  24                      2->OneCut   : the enzyme produce one double strand cut. 
  25                      4->TwoCuts  : two double strand cuts. 
  26          ---------------------------------------------------------------------------- 
  27              Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  28                                      the enzyme. 
  29                                      Not implemented yet. 
  30          ---------------------------------------------------------------------------- 
  31              Palindromic,            if the site is palindromic or not. 
  32              NotPalindromic          allow some optimisations of the code. 
  33                                      No need to check the reverse strand 
  34                                      with palindromic sites. 
  35          ---------------------------------------------------------------------------- 
  36              Unknown, Blunt,         represent the overhang. 
  37              Ov5, Ov3                Unknown is here for symmetry reasons and 
  38                                      correspond to enzymes that are not 
  39                                      characterised in rebase. 
  40          ---------------------------------------------------------------------------- 
  41              Defined, Ambiguous,     represent the sequence of the overhang. 
  42              NotDefined 
  43                                      NotDefined is for enzymes not characterised 
  44                                      in rebase. 
  45   
  46                                      Defined correspond to enzymes that display 
  47                                      a constant overhang whatever the sequence. 
  48                                      ex : EcoRI. G^AATTC -> overhang :AATT 
  49                                                  CTTAA^G 
  50   
  51                                      Ambiguous : the overhang varies with the 
  52                                      sequence restricted. 
  53                                      Typically enzymes which cut outside their 
  54                                      restriction site or (but not always) 
  55                                      inside an ambiguous site. 
  56                                      ex: 
  57                                      AcuI CTGAAG(22/20)  -> overhang : NN 
  58                                      AasI GACNNN^NNNGTC  -> overhang : NN 
  59                                           CTGN^NNNNNCAG 
  60   
  61                  note : these 3 classes refers to the overhang not the site. 
  62                     So the enzyme ApoI (RAATTY) is defined even if its 
  63                     restriction site is ambiguous. 
  64   
  65                          ApoI R^AATTY -> overhang : AATT -> Defined 
  66                               YTTAA^R 
  67                     Accordingly, blunt enzymes are always Defined even 
  68                     when they cut outside their restriction site. 
  69          ---------------------------------------------------------------------------- 
  70              Not_available,          as found in rebase file emboss_r.NNN files. 
  71              Commercially_available 
  72                                      allow the selection of the enzymes 
  73                                      according to their suppliers to reduce the 
  74                                      quantity of results. 
  75                                      Also will allow the implementation of 
  76                                      buffer compatibility tables. Not 
  77                                      implemented yet. 
  78   
  79                                      the list of suppliers is extracted from 
  80                                      emboss_s.NNN 
  81          ---------------------------------------------------------------------------- 
  82  """ 
  83   
  84  from __future__ import print_function 
  85  from Bio._py3k import zip 
  86  from Bio._py3k import filter 
  87  from Bio._py3k import range 
  88   
  89  import re 
  90  import itertools 
  91   
  92  from Bio.Seq import Seq, MutableSeq 
  93  from Bio.Alphabet import IUPAC 
  94   
  95  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  96  from Bio.Restriction.Restriction_Dictionary import typedict 
  97  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
  98  # TODO: Consider removing this wildcard import. 
  99  from Bio.Restriction.RanaConfig import * 
 100  from Bio.Restriction.PrintFormat import PrintFormat 
101 102 103 # Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 104 # namespace), but have deprecated that module. 105 106 107 -def _check_bases(seq_string):
108 """Check characters in a string (PRIVATE). 109 110 Remove digits and white space present in string. Allows any valid ambiguous 111 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 112 113 Other characters (e.g. symbols) trigger a TypeError. 114 115 Returns the string WITH A LEADING SPACE (!). This is for backwards 116 compatibility, and may in part be explained by the fact that 117 Bio.Restriction doesn't use zero based counting. 118 """ 119 # Remove white space and make upper case: 120 seq_string = "".join(seq_string.split()).upper() 121 # Remove digits 122 for c in "0123456789": 123 seq_string = seq_string.replace(c, "") 124 # Check only allowed IUPAC letters 125 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")): 126 raise TypeError("Invalid character found in %s" % repr(seq_string)) 127 return " " + seq_string
128 129 130 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN', 131 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY', 132 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY', 133 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY', 134 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY', 135 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'} 136 137 DNA = Seq
138 139 140 -class FormattedSeq(object):
141 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 142 143 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 144 145 Roughly: 146 remove anything which is not IUPAC alphabet and then add a space 147 in front of the sequence to get a biological index instead of a 148 python index (i.e. index of the first base is 1 not 0). 149 150 Retains information about the shape of the molecule linear (default) 151 or circular. Restriction sites are search over the edges of circular 152 sequence. 153 """ 154
155 - def __init__(self, seq, linear=True):
156 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 157 158 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 159 if seq is a FormattedSeq, linear will have no effect on the 160 shape of the sequence. 161 """ 162 if isinstance(seq, (Seq, MutableSeq)): 163 stringy = str(seq) 164 self.lower = stringy.islower() 165 # Note this adds a leading space to the sequence (!) 166 self.data = _check_bases(stringy) 167 self.linear = linear 168 self.klass = seq.__class__ 169 self.alphabet = seq.alphabet 170 elif isinstance(seq, FormattedSeq): 171 self.lower = seq.lower 172 self.data = seq.data 173 self.linear = seq.linear 174 self.alphabet = seq.alphabet 175 self.klass = seq.klass 176 else: 177 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
178
179 - def __len__(self):
180 return len(self.data) - 1
181
182 - def __repr__(self):
183 return 'FormattedSeq(%s, linear=%s)' % (repr(self[1:]), 184 repr(self.linear))
185
186 - def __eq__(self, other):
187 if isinstance(other, FormattedSeq): 188 if repr(self) == repr(other): 189 return True 190 else: 191 return False 192 return False
193
194 - def circularise(self):
195 """FS.circularise() -> circularise FS""" 196 self.linear = False 197 return
198
199 - def linearise(self):
200 """FS.linearise() -> linearise FS""" 201 self.linear = True 202 return
203
204 - def to_linear(self):
205 """FS.to_linear() -> new linear FS instance""" 206 new = self.__class__(self) 207 new.linear = True 208 return new
209
210 - def to_circular(self):
211 """FS.to_circular() -> new circular FS instance""" 212 new = self.__class__(self) 213 new.linear = False 214 return new
215
216 - def is_linear(self):
217 """FS.is_linear() -> bool. 218 219 True if the sequence will analysed as a linear sequence.""" 220 return self.linear
221
222 - def finditer(self, pattern, size):
223 """FS.finditer(pattern, size) -> list. 224 225 return a list of pattern into the sequence. 226 the list is made of tuple (location, pattern.group). 227 the latter is used with non palindromic sites. 228 pattern is the regular expression pattern corresponding to the 229 enzyme restriction site. 230 size is the size of the restriction enzyme recognition-site size. 231 """ 232 if self.is_linear(): 233 data = self.data 234 else: 235 data = self.data + self.data[1:size] 236 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
237
238 - def __getitem__(self, i):
239 if self.lower: 240 return self.klass((self.data[i]).lower(), self.alphabet) 241 return self.klass(self.data[i], self.alphabet)
242
243 244 -class RestrictionType(type):
245 """RestrictionType. Type from which derives all enzyme classes. 246 247 Implement the operator methods. 248 """ 249
250 - def __init__(cls, name='', bases=(), dct=None):
251 """RE(name, bases, dct) -> RestrictionType instance. 252 253 Not intended to be used in normal operation. The enzymes are 254 instantiated when importing the module. 255 256 see below.""" 257 if "-" in name: 258 raise ValueError("Problem with hyphen in %s as enzyme name" 259 % repr(name)) 260 # 2011/11/26 - Nobody knows what this call was supposed to accomplish, 261 # but all unit tests seem to pass without it. 262 # super(RestrictionType, cls).__init__(cls, name, bases, dct) 263 try: 264 cls.compsite = re.compile(cls.compsite) 265 except Exception as err: 266 raise ValueError("Problem with regular expression, re.compiled(%s)" 267 % repr(cls.compsite))
268
269 - def __add__(cls, other):
270 """RE.__add__(other) -> RestrictionBatch(). 271 272 if other is an enzyme returns a batch of the two enzymes. 273 if other is already a RestrictionBatch add enzyme to it. 274 """ 275 if isinstance(other, RestrictionType): 276 return RestrictionBatch([cls, other]) 277 elif isinstance(other, RestrictionBatch): 278 return other.add_nocheck(cls) 279 else: 280 raise TypeError
281
282 - def __div__(cls, other):
283 """RE.__div__(other) -> list. 284 285 RE/other 286 returns RE.search(other).""" 287 return cls.search(other)
288
289 - def __rdiv__(cls, other):
290 """RE.__rdiv__(other) -> list. 291 292 other/RE 293 returns RE.search(other).""" 294 return cls.search(other)
295
296 - def __truediv__(cls, other):
297 """RE.__truediv__(other) -> list. 298 299 RE/other 300 returns RE.search(other).""" 301 return cls.search(other)
302
303 - def __rtruediv__(cls, other):
304 """RE.__rtruediv__(other) -> list. 305 306 other/RE 307 returns RE.search(other).""" 308 return cls.search(other)
309
310 - def __floordiv__(cls, other):
311 """RE.__floordiv__(other) -> list. 312 313 RE//other 314 returns RE.catalyse(other).""" 315 return cls.catalyse(other)
316
317 - def __rfloordiv__(cls, other):
318 """RE.__rfloordiv__(other) -> list. 319 320 other//RE 321 returns RE.catalyse(other).""" 322 return cls.catalyse(other)
323
324 - def __str__(cls):
325 """RE.__str__() -> str. 326 327 return the name of the enzyme.""" 328 return cls.__name__
329
330 - def __repr__(cls):
331 """RE.__repr__() -> str. 332 333 used with eval or exec will instantiate the enzyme.""" 334 return "%s" % cls.__name__
335
336 - def __len__(cls):
337 """RE.__len__() -> int. 338 339 length of the recognition site.""" 340 return cls.size
341
342 - def __hash__(cls):
343 # Python default is to use id(...) 344 # This is consistent with the __eq__ implementation 345 return id(cls)
346
347 - def __eq__(cls, other):
348 """RE == other -> bool 349 350 True if RE and other are the same enzyme. 351 352 Specifically this checks they are the same Python object. 353 """ 354 # assert (id(cls)==id(other)) == (other is cls) == (cls is other) 355 return id(cls) == id(other)
356
357 - def __ne__(cls, other):
358 """RE != other -> bool. 359 isoschizomer strict, same recognition site, same restriction -> False 360 all the other-> True 361 362 WARNING - This is not the inverse of the __eq__ method. 363 """ 364 if not isinstance(other, RestrictionType): 365 return True 366 elif cls.charac == other.charac: 367 return False 368 else: 369 return True
370
371 - def __rshift__(cls, other):
372 """RE >> other -> bool. 373 374 neoschizomer : same recognition site, different restriction. -> True 375 all the others : -> False 376 """ 377 if not isinstance(other, RestrictionType): 378 return False 379 elif cls.site == other.site and cls.charac != other.charac: 380 return True 381 else: 382 return False
383
384 - def __mod__(cls, other):
385 """a % b -> bool. 386 387 Test compatibility of the overhang of a and b. 388 True if a and b have compatible overhang. 389 """ 390 if not isinstance(other, RestrictionType): 391 raise TypeError( 392 'expected RestrictionType, got %s instead' % type(other)) 393 return cls._mod1(other)
394
395 - def __ge__(cls, other):
396 """a >= b -> bool. 397 398 a is greater or equal than b if the a site is longer than b site. 399 if their site have the same length sort by alphabetical order of their 400 names.""" 401 if not isinstance(other, RestrictionType): 402 raise NotImplementedError 403 if len(cls) > len(other): 404 return True 405 elif cls.size == len(other) and cls.__name__ >= other.__name__: 406 return True 407 else: 408 return False
409
410 - def __gt__(cls, other):
411 """a > b -> bool. 412 413 sorting order: 414 1. size of the recognition site. 415 2. if equal size, alphabetical order of the names.""" 416 if not isinstance(other, RestrictionType): 417 raise NotImplementedError 418 if len(cls) > len(other): 419 return True 420 elif cls.size == len(other) and cls.__name__ > other.__name__: 421 return True 422 else: 423 return False
424
425 - def __le__(cls, other):
426 """a <= b -> bool. 427 428 sorting order: 429 1. size of the recognition site. 430 2. if equal size, alphabetical order of the names. 431 """ 432 if not isinstance(other, RestrictionType): 433 raise NotImplementedError 434 elif len(cls) < len(other): 435 return True 436 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 437 return True 438 else: 439 return False
440
441 - def __lt__(cls, other):
442 """a < b -> bool. 443 444 sorting order: 445 1. size of the recognition site. 446 2. if equal size, alphabetical order of the names. 447 """ 448 if not isinstance(other, RestrictionType): 449 raise NotImplementedError 450 elif len(cls) < len(other): 451 return True 452 elif len(cls) == len(other) and cls.__name__ < other.__name__: 453 return True 454 else: 455 return False
456
457 458 -class AbstractCut(RestrictionType):
459 """Implement the methods that are common to all restriction enzymes. 460 461 All the methods are classmethod. 462 463 For internal use only. Not meant to be instantiate. 464 """ 465 466 @classmethod
467 - def search(cls, dna, linear=True):
468 """RE.search(dna, linear=True) -> list. 469 470 return a list of all the site of RE in dna. Compensate for circular 471 sequences and so on. 472 473 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 474 475 if linear is False, the restriction sites than span over the boundaries 476 will be included. 477 478 The positions are the first base of the 3' fragment, 479 i.e. the first base after the position the enzyme will cut. 480 """ 481 # 482 # Separating search from _search allow a (very limited) optimisation 483 # of the search when using a batch of restriction enzymes. 484 # in this case the DNA is tested once by the class which implements 485 # the batch instead of being tested by each enzyme single. 486 # see RestrictionBatch.search() for example. 487 # 488 if isinstance(dna, FormattedSeq): 489 cls.dna = dna 490 return cls._search() 491 else: 492 cls.dna = FormattedSeq(dna, linear) 493 return cls._search()
494 495 @classmethod
496 - def all_suppliers(cls):
497 """RE.all_suppliers -> print all the suppliers of R""" 498 supply = sorted(x[0] for x in suppliers_dict.values()) 499 print(",\n".join(supply)) 500 return
501 502 @classmethod
503 - def is_equischizomer(cls, other):
504 """RE.is_equischizomers(other) -> bool. 505 506 True if other is an isoschizomer of RE. 507 False else. 508 509 equischizomer <=> same site, same position of restriction. 510 """ 511 return not cls != other
512 513 @classmethod
514 - def is_neoschizomer(cls, other):
515 """RE.is_neoschizomers(other) -> bool. 516 517 True if other is an isoschizomer of RE. 518 False else. 519 520 neoschizomer <=> same site, different position of restriction. 521 """ 522 return cls >> other
523 524 @classmethod
525 - def is_isoschizomer(cls, other):
526 """RE.is_isoschizomers(other) -> bool. 527 528 True if other is an isoschizomer of RE. 529 False else. 530 531 isoschizomer <=> same site.""" 532 return (not cls != other) or cls >> other
533 534 @classmethod
535 - def equischizomers(cls, batch=None):
536 """RE.equischizomers([batch]) -> list. 537 538 return a tuple of all the isoschizomers of RE. 539 if batch is supplied it is used instead of the default AllEnzymes. 540 541 equischizomer <=> same site, same position of restriction. 542 """ 543 if not batch: 544 batch = AllEnzymes 545 r = [x for x in batch if not cls != x] 546 i = r.index(cls) 547 del r[i] 548 r.sort() 549 return r
550 551 @classmethod
552 - def neoschizomers(cls, batch=None):
553 """RE.neoschizomers([batch]) -> list. 554 555 return a tuple of all the neoschizomers of RE. 556 if batch is supplied it is used instead of the default AllEnzymes. 557 558 neoschizomer <=> same site, different position of restriction.""" 559 if not batch: 560 batch = AllEnzymes 561 r = sorted(x for x in batch if cls >> x) 562 return r
563 564 @classmethod
565 - def isoschizomers(cls, batch=None):
566 """RE.isoschizomers([batch]) -> list. 567 568 return a tuple of all the equischizomers and neoschizomers of RE. 569 if batch is supplied it is used instead of the default AllEnzymes. 570 """ 571 if not batch: 572 batch = AllEnzymes 573 r = [x for x in batch if (cls >> x) or (not cls != x)] 574 i = r.index(cls) 575 del r[i] 576 r.sort() 577 return r
578 579 @classmethod
580 - def frequency(cls):
581 """RE.frequency() -> int. 582 583 frequency of the site.""" 584 return cls.freq
585
586 587 -class NoCut(AbstractCut):
588 """Implement the methods specific to the enzymes that do not cut. 589 590 These enzymes are generally enzymes that have been only partially 591 characterised and the way they cut the DNA is unknow or enzymes for 592 which the pattern of cut is to complex to be recorded in Rebase 593 (ncuts values of 0 in emboss_e.###). 594 595 When using search() with these enzymes the values returned are at the start 596 of the restriction site. 597 598 Their catalyse() method returns a TypeError. 599 600 Unknown and NotDefined are also part of the base classes of these enzymes. 601 602 Internal use only. Not meant to be instantiated. 603 """ 604 605 @classmethod
606 - def cut_once(cls):
607 """RE.cut_once() -> bool. 608 609 True if the enzyme cut the sequence one time on each strand.""" 610 return False
611 612 @classmethod
613 - def cut_twice(cls):
614 """RE.cut_twice() -> bool. 615 616 True if the enzyme cut the sequence twice on each strand.""" 617 return False
618 619 @classmethod
620 - def _modify(cls, location):
621 """RE._modify(location) -> int. 622 623 for internal use only. 624 625 location is an integer corresponding to the location of the match for 626 the enzyme pattern in the sequence. 627 _modify returns the real place where the enzyme will cut. 628 629 example:: 630 631 EcoRI pattern : GAATTC 632 EcoRI will cut after the G. 633 so in the sequence: 634 ______ 635 GAATACACGGAATTCGA 636 | 637 10 638 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 639 EcoRI cut after the G so: 640 EcoRI._modify(10) -> 11. 641 642 if the enzyme cut twice _modify will returns two integer corresponding 643 to each cutting site. 644 """ 645 yield location
646 647 @classmethod
648 - def _rev_modify(cls, location):
649 """RE._rev_modify(location) -> generator of int. 650 651 for internal use only. 652 653 as _modify for site situated on the antiparallel strand when the 654 enzyme is not palindromic 655 """ 656 yield location
657 658 @classmethod
659 - def characteristic(cls):
660 """RE.characteristic() -> tuple. 661 662 the tuple contains the attributes: 663 fst5 -> first 5' cut ((current strand) or None 664 fst3 -> first 3' cut (complementary strand) or None 665 scd5 -> second 5' cut (current strand) or None 666 scd5 -> second 3' cut (complementary strand) or None 667 site -> recognition site. 668 """ 669 return None, None, None, None, cls.site
670
671 672 -class OneCut(AbstractCut):
673 """Implement the methods specific to the enzymes that cut the DNA only once 674 675 Correspond to ncuts values of 2 in emboss_e.### 676 677 Internal use only. Not meant to be instantiated. 678 """ 679 680 @classmethod
681 - def cut_once(cls):
682 """RE.cut_once() -> bool. 683 684 True if the enzyme cut the sequence one time on each strand. 685 """ 686 return True
687 688 @classmethod
689 - def cut_twice(cls):
690 """RE.cut_twice() -> bool. 691 692 True if the enzyme cut the sequence twice on each strand. 693 """ 694 return False
695 696 @classmethod
697 - def _modify(cls, location):
698 """RE._modify(location) -> int. 699 700 for internal use only. 701 702 location is an integer corresponding to the location of the match for 703 the enzyme pattern in the sequence. 704 _modify returns the real place where the enzyme will cut. 705 706 example:: 707 708 EcoRI pattern : GAATTC 709 EcoRI will cut after the G. 710 so in the sequence: 711 ______ 712 GAATACACGGAATTCGA 713 | 714 10 715 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 716 EcoRI cut after the G so: 717 EcoRI._modify(10) -> 11. 718 719 if the enzyme cut twice _modify will returns two integer corresponding 720 to each cutting site. 721 """ 722 yield location + cls.fst5
723 724 @classmethod
725 - def _rev_modify(cls, location):
726 """RE._rev_modify(location) -> generator of int. 727 728 for internal use only. 729 730 as _modify for site situated on the antiparallel strand when the 731 enzyme is not palindromic 732 """ 733 yield location - cls.fst3
734 735 @classmethod
736 - def characteristic(cls):
737 """RE.characteristic() -> tuple. 738 739 the tuple contains the attributes: 740 fst5 -> first 5' cut ((current strand) or None 741 fst3 -> first 3' cut (complementary strand) or None 742 scd5 -> second 5' cut (current strand) or None 743 scd5 -> second 3' cut (complementary strand) or None 744 site -> recognition site. 745 """ 746 return cls.fst5, cls.fst3, None, None, cls.site
747
748 749 -class TwoCuts(AbstractCut):
750 """Implement the methods specific to the enzymes that cut the DNA twice 751 752 Correspond to ncuts values of 4 in emboss_e.### 753 754 Internal use only. Not meant to be instantiated.""" 755 756 @classmethod
757 - def cut_once(cls):
758 """RE.cut_once() -> bool. 759 760 True if the enzyme cut the sequence one time on each strand.""" 761 return False
762 763 @classmethod
764 - def cut_twice(cls):
765 """RE.cut_twice() -> bool. 766 767 True if the enzyme cut the sequence twice on each strand. 768 """ 769 return True
770 771 @classmethod
772 - def _modify(cls, location):
773 """RE._modify(location) -> int. 774 775 for internal use only. 776 777 location is an integer corresponding to the location of the match for 778 the enzyme pattern in the sequence. 779 _modify returns the real place where the enzyme will cut. 780 781 example:: 782 783 EcoRI pattern : GAATTC 784 EcoRI will cut after the G. 785 so in the sequence: 786 ______ 787 GAATACACGGAATTCGA 788 | 789 10 790 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 791 EcoRI cut after the G so: 792 EcoRI._modify(10) -> 11. 793 794 if the enzyme cut twice _modify will returns two integer corresponding 795 to each cutting site. 796 """ 797 yield location + cls.fst5 798 yield location + cls.scd5
799 800 @classmethod
801 - def _rev_modify(cls, location):
802 """RE._rev_modify(location) -> generator of int. 803 804 for internal use only. 805 806 as _modify for site situated on the antiparallel strand when the 807 enzyme is not palindromic 808 """ 809 yield location - cls.fst3 810 yield location - cls.scd3
811 812 @classmethod
813 - def characteristic(cls):
814 """RE.characteristic() -> tuple. 815 816 the tuple contains the attributes: 817 fst5 -> first 5' cut ((current strand) or None 818 fst3 -> first 3' cut (complementary strand) or None 819 scd5 -> second 5' cut (current strand) or None 820 scd5 -> second 3' cut (complementary strand) or None 821 site -> recognition site. 822 """ 823 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
824
825 826 -class Meth_Dep(AbstractCut):
827 """Implement the information about methylation. 828 829 Enzymes of this class possess a site which is methylable. 830 """ 831 832 @classmethod
833 - def is_methylable(cls):
834 """RE.is_methylable() -> bool. 835 836 True if the recognition site is a methylable. 837 """ 838 return True
839
840 841 -class Meth_Undep(AbstractCut):
842 """Implement information about methylation sensitibility. 843 844 Enzymes of this class are not sensible to methylation. 845 """ 846 847 @classmethod
848 - def is_methylable(cls):
849 """RE.is_methylable() -> bool. 850 851 True if the recognition site is a methylable. 852 """ 853 return False
854
855 856 -class Palindromic(AbstractCut):
857 """Implement the methods specific to the enzymes which are palindromic 858 859 palindromic means : the recognition site and its reverse complement are 860 identical. 861 Remarks : an enzyme with a site CGNNCG is palindromic even if some 862 of the sites that it will recognise are not. 863 for example here : CGAACG 864 865 Internal use only. Not meant to be instantiated.""" 866 867 @classmethod
868 - def _search(cls):
869 """RE._search() -> list. 870 871 for internal use only. 872 873 implement the search method for palindromic and non palindromic enzyme. 874 """ 875 siteloc = cls.dna.finditer(cls.compsite, cls.size) 876 cls.results = [r for s, g in siteloc for r in cls._modify(s)] 877 if cls.results: 878 cls._drop() 879 return cls.results
880 881 @classmethod
882 - def is_palindromic(cls):
883 """RE.is_palindromic() -> bool. 884 885 True if the recognition site is a palindrom. 886 """ 887 return True
888
889 890 -class NonPalindromic(AbstractCut):
891 """Implement the methods specific to the enzymes which are not palindromic 892 893 palindromic means : the recognition site and its reverse complement are 894 identical. 895 896 Internal use only. Not meant to be instantiated.""" 897 898 @classmethod
899 - def _search(cls):
900 """RE._search() -> list. 901 902 for internal use only. 903 904 implement the search method for palindromic and non palindromic enzyme. 905 """ 906 iterator = cls.dna.finditer(cls.compsite, cls.size) 907 cls.results = [] 908 modif = cls._modify 909 revmodif = cls._rev_modify 910 s = str(cls) 911 cls.on_minus = [] 912 for start, group in iterator: 913 if group(s): 914 cls.results += [r for r in modif(start)] 915 else: 916 cls.on_minus += [r for r in revmodif(start)] 917 cls.results += cls.on_minus 918 if cls.results: 919 cls.results.sort() 920 cls._drop() 921 return cls.results
922 923 @classmethod
924 - def is_palindromic(cls):
925 """RE.is_palindromic() -> bool. 926 927 True if the recognition site is a palindrom. 928 """ 929 return False
930
931 932 -class Unknown(AbstractCut):
933 """Implement the methods specific to the enzymes for which the overhang 934 is unknown. 935 936 These enzymes are also NotDefined and NoCut. 937 938 Internal use only. Not meant to be instantiated. 939 """ 940 941 @classmethod
942 - def catalyse(cls, dna, linear=True):
943 """RE.catalyse(dna, linear=True) -> tuple of DNA. 944 RE.catalyze(dna, linear=True) -> tuple of DNA. 945 946 return a tuple of dna as will be produced by using RE to restrict the 947 dna. 948 949 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 950 951 if linear is False, the sequence is considered to be circular and the 952 output will be modified accordingly. 953 """ 954 raise NotImplementedError('%s restriction is unknown.' 955 % cls.__name__)
956 catalyze = catalyse 957 958 @classmethod
959 - def is_blunt(cls):
960 """RE.is_blunt() -> bool. 961 962 True if the enzyme produces blunt end. 963 964 see also: 965 RE.is_3overhang() 966 RE.is_5overhang() 967 RE.is_unknown() 968 """ 969 return False
970 971 @classmethod
972 - def is_5overhang(cls):
973 """RE.is_5overhang() -> bool. 974 975 True if the enzyme produces 5' overhang sticky end. 976 977 see also: 978 RE.is_3overhang() 979 RE.is_blunt() 980 RE.is_unknown() 981 """ 982 return False
983 984 @classmethod
985 - def is_3overhang(cls):
986 """RE.is_3overhang() -> bool. 987 988 True if the enzyme produces 3' overhang sticky end. 989 990 see also: 991 RE.is_5overhang() 992 RE.is_blunt() 993 RE.is_unknown() 994 """ 995 return False
996 997 @classmethod
998 - def overhang(cls):
999 """RE.overhang() -> str. type of overhang of the enzyme., 1000 1001 can be "3' overhang", "5' overhang", "blunt", "unknown" 1002 """ 1003 return 'unknown'
1004 1005 @classmethod
1006 - def compatible_end(cls):
1007 """RE.compatible_end() -> list. 1008 1009 list of all the enzymes that share compatible end with RE. 1010 """ 1011 return []
1012 1013 @classmethod
1014 - def _mod1(cls, other):
1015 """RE._mod1(other) -> bool. 1016 1017 for internal use only 1018 1019 test for the compatibility of restriction ending of RE and other. 1020 """ 1021 return False
1022
1023 1024 -class Blunt(AbstractCut):
1025 """Implement the methods specific to the enzymes for which the overhang 1026 is blunt. 1027 1028 The enzyme cuts the + strand and the - strand of the DNA at the same 1029 place. 1030 1031 Internal use only. Not meant to be instantiated. 1032 """ 1033 1034 @classmethod
1035 - def catalyse(cls, dna, linear=True):
1036 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1037 RE.catalyze(dna, linear=True) -> tuple of DNA. 1038 1039 return a tuple of dna as will be produced by using RE to restrict the 1040 dna. 1041 1042 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1043 1044 if linear is False, the sequence is considered to be circular and the 1045 output will be modified accordingly. 1046 """ 1047 r = cls.search(dna, linear) 1048 d = cls.dna 1049 if not r: 1050 return d[1:], 1051 fragments = [] 1052 length = len(r) - 1 1053 if d.is_linear(): 1054 # 1055 # START of the sequence to FIRST site. 1056 # 1057 fragments.append(d[1:r[0]]) 1058 if length: 1059 # 1060 # if more than one site add them. 1061 # 1062 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1063 # 1064 # LAST site to END of the sequence. 1065 # 1066 fragments.append(d[r[-1]:]) 1067 else: 1068 # 1069 # circular : bridge LAST site to FIRST site. 1070 # 1071 fragments.append(d[r[-1]:] + d[1:r[0]]) 1072 if not length: 1073 # 1074 # one site we finish here. 1075 # 1076 return tuple(fragments) 1077 # 1078 # add the others. 1079 # 1080 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1081 return tuple(fragments)
1082 catalyze = catalyse 1083 1084 @classmethod
1085 - def is_blunt(cls):
1086 """RE.is_blunt() -> bool. 1087 1088 True if the enzyme produces blunt end. 1089 1090 see also: 1091 RE.is_3overhang() 1092 RE.is_5overhang() 1093 RE.is_unknown() 1094 """ 1095 return True
1096 1097 @classmethod
1098 - def is_5overhang(cls):
1099 """RE.is_5overhang() -> bool. 1100 1101 True if the enzyme produces 5' overhang sticky end. 1102 1103 see also: 1104 RE.is_3overhang() 1105 RE.is_blunt() 1106 RE.is_unknown() 1107 """ 1108 return False
1109 1110 @classmethod
1111 - def is_3overhang(cls):
1112 """RE.is_3overhang() -> bool. 1113 1114 True if the enzyme produces 3' overhang sticky end. 1115 1116 see also: 1117 RE.is_5overhang() 1118 RE.is_blunt() 1119 RE.is_unknown() 1120 """ 1121 return False
1122 1123 @classmethod
1124 - def overhang(cls):
1125 """RE.overhang() -> str. type of overhang of the enzyme., 1126 1127 can be "3' overhang", "5' overhang", "blunt", "unknown" 1128 """ 1129 return 'blunt'
1130 1131 @classmethod
1132 - def compatible_end(cls, batch=None):
1133 """RE.compatible_end() -> list. 1134 1135 list of all the enzymes that share compatible end with RE. 1136 """ 1137 if not batch: 1138 batch = AllEnzymes 1139 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) 1140 return r
1141 1142 @staticmethod
1143 - def _mod1(other):
1144 """RE._mod1(other) -> bool. 1145 1146 for internal use only 1147 1148 test for the compatibility of restriction ending of RE and other. 1149 """ 1150 return issubclass(other, Blunt)
1151
1152 1153 -class Ov5(AbstractCut):
1154 """Implement the methods specific to the enzymes for which the overhang 1155 is recessed in 3'. 1156 1157 The enzyme cuts the + strand after the - strand of the DNA. 1158 1159 Internal use only. Not meant to be instantiated. 1160 """ 1161 1162 @classmethod
1163 - def catalyse(cls, dna, linear=True):
1164 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1165 RE.catalyze(dna, linear=True) -> tuple of DNA. 1166 1167 return a tuple of dna as will be produced by using RE to restrict the 1168 dna. 1169 1170 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1171 1172 if linear is False, the sequence is considered to be circular and the 1173 output will be modified accordingly. 1174 """ 1175 r = cls.search(dna, linear) 1176 d = cls.dna 1177 if not r: 1178 return d[1:], 1179 length = len(r) - 1 1180 fragments = [] 1181 if d.is_linear(): 1182 # 1183 # START of the sequence to FIRST site. 1184 # 1185 fragments.append(d[1:r[0]]) 1186 if length: 1187 # 1188 # if more than one site add them. 1189 # 1190 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1191 # 1192 # LAST site to END of the sequence. 1193 # 1194 fragments.append(d[r[-1]:]) 1195 else: 1196 # 1197 # circular : bridge LAST site to FIRST site. 1198 # 1199 fragments.append(d[r[-1]:] + d[1:r[0]]) 1200 if not length: 1201 # 1202 # one site we finish here. 1203 # 1204 return tuple(fragments) 1205 # 1206 # add the others. 1207 # 1208 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1209 return tuple(fragments)
1210 catalyze = catalyse 1211 1212 @classmethod
1213 - def is_blunt(cls):
1214 """RE.is_blunt() -> bool. 1215 1216 True if the enzyme produces blunt end. 1217 1218 see also: 1219 RE.is_3overhang() 1220 RE.is_5overhang() 1221 RE.is_unknown() 1222 """ 1223 return False
1224 1225 @classmethod
1226 - def is_5overhang(cls):
1227 """RE.is_5overhang() -> bool. 1228 1229 True if the enzyme produces 5' overhang sticky end. 1230 1231 see also: 1232 RE.is_3overhang() 1233 RE.is_blunt() 1234 RE.is_unknown() 1235 """ 1236 return True
1237 1238 @classmethod
1239 - def is_3overhang(cls):
1240 """RE.is_3overhang() -> bool. 1241 1242 True if the enzyme produces 3' overhang sticky end. 1243 1244 see also: 1245 RE.is_5overhang() 1246 RE.is_blunt() 1247 RE.is_unknown() 1248 """ 1249 return False
1250 1251 @classmethod
1252 - def overhang(cls):
1253 """RE.overhang() -> str. type of overhang of the enzyme., 1254 1255 can be "3' overhang", "5' overhang", "blunt", "unknown" 1256 """ 1257 return "5' overhang"
1258 1259 @classmethod
1260 - def compatible_end(cls, batch=None):
1261 """RE.compatible_end() -> list. 1262 1263 list of all the enzymes that share compatible end with RE.""" 1264 if not batch: 1265 batch = AllEnzymes 1266 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and 1267 x % cls) 1268 return r
1269 1270 @classmethod
1271 - def _mod1(cls, other):
1272 """RE._mod1(other) -> bool. 1273 1274 for internal use only 1275 1276 test for the compatibility of restriction ending of RE and other. 1277 """ 1278 if issubclass(other, Ov5): 1279 return cls._mod2(other) 1280 else: 1281 return False
1282
1283 1284 -class Ov3(AbstractCut):
1285 """Implement the methods specific to the enzymes for which the overhang 1286 is recessed in 5'. 1287 1288 The enzyme cuts the - strand after the + strand of the DNA. 1289 1290 Internal use only. Not meant to be instantiated. 1291 """ 1292 1293 @classmethod
1294 - def catalyse(cls, dna, linear=True):
1295 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1296 RE.catalyze(dna, linear=True) -> tuple of DNA. 1297 1298 return a tuple of dna as will be produced by using RE to restrict the 1299 dna. 1300 1301 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1302 1303 if linear is False, the sequence is considered to be circular and the 1304 output will be modified accordingly. 1305 """ 1306 r = cls.search(dna, linear) 1307 d = cls.dna 1308 if not r: 1309 return d[1:], 1310 fragments = [] 1311 length = len(r) - 1 1312 if d.is_linear(): 1313 # 1314 # START of the sequence to FIRST site. 1315 # 1316 fragments.append(d[1:r[0]]) 1317 if length: 1318 # 1319 # if more than one site add them. 1320 # 1321 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1322 # 1323 # LAST site to END of the sequence. 1324 # 1325 fragments.append(d[r[-1]:]) 1326 else: 1327 # 1328 # circular : bridge LAST site to FIRST site. 1329 # 1330 fragments.append(d[r[-1]:] + d[1:r[0]]) 1331 if not length: 1332 # 1333 # one site we finish here. 1334 # 1335 return tuple(fragments) 1336 # 1337 # add the others. 1338 # 1339 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1340 return tuple(fragments)
1341 catalyze = catalyse 1342 1343 @classmethod
1344 - def is_blunt(cls):
1345 """RE.is_blunt() -> bool. 1346 1347 True if the enzyme produces blunt end. 1348 1349 see also: 1350 RE.is_3overhang() 1351 RE.is_5overhang() 1352 RE.is_unknown() 1353 """ 1354 return False
1355 1356 @classmethod
1357 - def is_5overhang(cls):
1358 """RE.is_5overhang() -> bool. 1359 1360 True if the enzyme produces 5' overhang sticky end. 1361 1362 see also: 1363 RE.is_3overhang() 1364 RE.is_blunt() 1365 RE.is_unknown() 1366 """ 1367 return False
1368 1369 @classmethod
1370 - def is_3overhang(cls):
1371 """RE.is_3overhang() -> bool. 1372 1373 True if the enzyme produces 3' overhang sticky end. 1374 1375 see also: 1376 RE.is_5overhang() 1377 RE.is_blunt() 1378 RE.is_unknown() 1379 """ 1380 return True
1381 1382 @classmethod
1383 - def overhang(cls):
1384 """RE.overhang() -> str. type of overhang of the enzyme., 1385 1386 can be "3' overhang", "5' overhang", "blunt", "unknown" 1387 """ 1388 return "3' overhang"
1389 1390 @classmethod
1391 - def compatible_end(cls, batch=None):
1392 """RE.compatible_end() -> list. 1393 1394 list of all the enzymes that share compatible end with RE. 1395 """ 1396 if not batch: 1397 batch = AllEnzymes 1398 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and 1399 x % cls) 1400 return r
1401 1402 @classmethod
1403 - def _mod1(cls, other):
1404 """RE._mod1(other) -> bool. 1405 1406 for internal use only 1407 1408 test for the compatibility of restriction ending of RE and other. 1409 """ 1410 # 1411 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1412 # 1413 if issubclass(other, Ov3): 1414 return cls._mod2(other) 1415 else: 1416 return False
1417
1418 1419 -class Defined(AbstractCut):
1420 """Implement the methods specific to the enzymes for which the overhang 1421 and the cut are not variable. 1422 1423 Typical example : EcoRI -> G^AATT_C 1424 The overhang will always be AATT 1425 Notes: 1426 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1427 There overhang is always the same : blunt! 1428 1429 Internal use only. Not meant to be instantiated.""" 1430 1431 @classmethod
1432 - def _drop(cls):
1433 """RE._drop() -> list. 1434 1435 for internal use only. 1436 1437 drop the site that are situated outside the sequence in linear 1438 sequence. modify the index for site in circular sequences. 1439 """ 1440 # 1441 # remove or modify the results that are outside the sequence. 1442 # This is necessary since after finding the site we add the distance 1443 # from the site to the cut with the _modify and _rev_modify methods. 1444 # For linear we will remove these sites altogether. 1445 # For circular sequence, we modify the result rather than _drop it 1446 # since the site is in the sequence. 1447 # 1448 length = len(cls.dna) 1449 drop = itertools.dropwhile 1450 take = itertools.takewhile 1451 if cls.dna.is_linear(): 1452 cls.results = [x for x in drop(lambda x:x < 1, cls.results)] 1453 cls.results = [x for x in take(lambda x:x < length, cls.results)] 1454 else: 1455 for index, location in enumerate(cls.results): 1456 if location < 1: 1457 cls.results[index] += length 1458 else: 1459 break 1460 for index, location in enumerate(cls.results[::-1]): 1461 if location > length: 1462 cls.results[-(index + 1)] -= length 1463 else: 1464 break 1465 return
1466 1467 @classmethod
1468 - def is_defined(cls):
1469 """RE.is_defined() -> bool. 1470 1471 True if the sequence recognised and cut is constant, 1472 i.e. the recognition site is not degenerated AND the enzyme cut inside 1473 the site. 1474 1475 see also: 1476 RE.is_ambiguous() 1477 RE.is_unknown() 1478 """ 1479 return True
1480 1481 @classmethod
1482 - def is_ambiguous(cls):
1483 """RE.is_ambiguous() -> bool. 1484 1485 True if the sequence recognised and cut is ambiguous, 1486 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1487 the site. 1488 1489 see also: 1490 RE.is_defined() 1491 RE.is_unknown() 1492 """ 1493 return False
1494 1495 @classmethod
1496 - def is_unknown(cls):
1497 """RE.is_unknown() -> bool. 1498 1499 True if the sequence is unknown, 1500 i.e. the recognition site has not been characterised yet. 1501 1502 see also: 1503 RE.is_defined() 1504 RE.is_ambiguous() 1505 """ 1506 return False
1507 1508 @classmethod
1509 - def elucidate(cls):
1510 """RE.elucidate() -> str 1511 1512 return a representation of the site with the cut on the (+) strand 1513 represented as '^' and the cut on the (-) strand as '_'. 1514 ie: 1515 >>> EcoRI.elucidate() # 5' overhang 1516 'G^AATT_C' 1517 >>> KpnI.elucidate() # 3' overhang 1518 'G_GTAC^C' 1519 >>> EcoRV.elucidate() # blunt 1520 'GAT^_ATC' 1521 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1522 '? GTATAC ?' 1523 >>> 1524 """ 1525 f5 = cls.fst5 1526 f3 = cls.fst3 1527 site = cls.site 1528 if cls.cut_twice(): 1529 re = 'cut twice, not yet implemented sorry.' 1530 elif cls.is_5overhang(): 1531 if f5 == f3 == 0: 1532 re = 'N^' + cls.site + '_N' 1533 elif f3 == 0: 1534 re = site[:f5] + '^' + site[f5:] + '_N' 1535 else: 1536 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1537 elif cls.is_blunt(): 1538 re = site[:f5] + '^_' + site[f5:] 1539 else: 1540 if f5 == f3 == 0: 1541 re = 'N_' + site + '^N' 1542 else: 1543 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1544 return re
1545 1546 @classmethod
1547 - def _mod2(cls, other):
1548 """RE._mod2(other) -> bool. 1549 1550 for internal use only 1551 1552 test for the compatibility of restriction ending of RE and other. 1553 """ 1554 # 1555 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1556 # 1557 if other.ovhgseq == cls.ovhgseq: 1558 return True 1559 elif issubclass(other, Ambiguous): 1560 return other._mod2(cls) 1561 else: 1562 return False
1563
1564 1565 -class Ambiguous(AbstractCut):
1566 """Implement the methods specific to the enzymes for which the overhang 1567 is variable. 1568 1569 Typical example : BstXI -> CCAN_NNNN^NTGG 1570 The overhang can be any sequence of 4 bases. 1571 Notes: 1572 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1573 There overhang is always the same : blunt! 1574 1575 Internal use only. Not meant to be instantiated. 1576 """ 1577 1578 @classmethod
1579 - def _drop(cls):
1580 """RE._drop() -> list. 1581 1582 for internal use only. 1583 1584 drop the site that are situated outside the sequence in linear 1585 sequence. modify the index for site in circular sequences. 1586 """ 1587 length = len(cls.dna) 1588 drop = itertools.dropwhile 1589 take = itertools.takewhile 1590 if cls.dna.is_linear(): 1591 cls.results = [x for x in drop(lambda x: x < 1, cls.results)] 1592 cls.results = [x for x in take(lambda x: x < 1593 length, cls.results)] 1594 else: 1595 for index, location in enumerate(cls.results): 1596 if location < 1: 1597 cls.results[index] += length 1598 else: 1599 break 1600 for index, location in enumerate(cls.results[::-1]): 1601 if location > length: 1602 cls.results[-(index + 1)] -= length 1603 else: 1604 break 1605 return
1606 1607 @classmethod
1608 - def is_defined(cls):
1609 """RE.is_defined() -> bool. 1610 1611 True if the sequence recognised and cut is constant, 1612 i.e. the recognition site is not degenerated AND the enzyme cut inside 1613 the site. 1614 1615 see also: 1616 RE.is_ambiguous() 1617 RE.is_unknown() 1618 """ 1619 return False
1620 1621 @classmethod
1622 - def is_ambiguous(cls):
1623 """RE.is_ambiguous() -> bool. 1624 1625 True if the sequence recognised and cut is ambiguous, 1626 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1627 the site. 1628 1629 see also: 1630 RE.is_defined() 1631 RE.is_unknown() 1632 """ 1633 return True
1634 1635 @classmethod
1636 - def is_unknown(cls):
1637 """RE.is_unknown() -> bool. 1638 1639 True if the sequence is unknown, 1640 i.e. the recognition site has not been characterised yet. 1641 1642 see also: 1643 RE.is_defined() 1644 RE.is_ambiguous() 1645 """ 1646 return False
1647 1648 @classmethod
1649 - def _mod2(cls, other):
1650 """RE._mod2(other) -> bool. 1651 1652 for internal use only 1653 1654 test for the compatibility of restriction ending of RE and other. 1655 """ 1656 # 1657 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1658 # 1659 if len(cls.ovhgseq) != len(other.ovhgseq): 1660 return False 1661 else: 1662 se = cls.ovhgseq 1663 for base in se: 1664 if base in 'ATCG': 1665 pass 1666 if base in 'N': 1667 se = '.'.join(se.split('N')) 1668 if base in 'RYWMSKHDBV': 1669 expand = '[' + matching[base] + ']' 1670 se = expand.join(se.split(base)) 1671 if re.match(se, other.ovhgseq): 1672 return True 1673 else: 1674 return False
1675 1676 @classmethod
1677 - def elucidate(cls):
1678 """RE.elucidate() -> str 1679 1680 return a representation of the site with the cut on the (+) strand 1681 represented as '^' and the cut on the (-) strand as '_'. 1682 ie: 1683 >>> EcoRI.elucidate() # 5' overhang 1684 'G^AATT_C' 1685 >>> KpnI.elucidate() # 3' overhang 1686 'G_GTAC^C' 1687 >>> EcoRV.elucidate() # blunt 1688 'GAT^_ATC' 1689 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1690 '? GTATAC ?' 1691 >>> 1692 """ 1693 f5 = cls.fst5 1694 f3 = cls.fst3 1695 length = len(cls) 1696 site = cls.site 1697 if cls.cut_twice(): 1698 re = 'cut twice, not yet implemented sorry.' 1699 elif cls.is_5overhang(): 1700 if f3 == f5 == 0: 1701 re = 'N^' + site + '_N' 1702 elif 0 <= f5 <= length and 0 <= f3 + length <= length: 1703 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1704 elif 0 <= f5 <= length: 1705 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N' 1706 elif 0 <= f3 + length <= length: 1707 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1708 elif f3 + length < 0: 1709 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site 1710 elif f5 > length: 1711 re = site + (f5 - length) * 'N' + '^' + (length + 1712 f3 - f5) * 'N' + '_N' 1713 else: 1714 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N' 1715 elif cls.is_blunt(): 1716 if f5 < 0: 1717 re = 'N^_' + abs(f5) * 'N' + site 1718 elif f5 > length: 1719 re = site + (f5 - length) * 'N' + '^_N' 1720 else: 1721 raise ValueError('%s.easyrepr() : error f5=%i' 1722 % (cls.name, f5)) 1723 else: 1724 if f3 == 0: 1725 if f5 == 0: 1726 re = 'N_' + site + '^N' 1727 else: 1728 re = site + '_' + (f5 - length) * 'N' + '^N' 1729 elif 0 < f3 + length <= length and 0 <= f5 <= length: 1730 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1731 elif 0 < f3 + length <= length: 1732 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N' 1733 elif 0 <= f5 <= length: 1734 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:] 1735 elif f3 > 0: 1736 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N' 1737 elif f5 < 0: 1738 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \ 1739 + site 1740 else: 1741 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \ 1742 'N' + '^N' 1743 return re
1744
1745 1746 -class NotDefined(AbstractCut):
1747 """Implement the methods specific to the enzymes for which the overhang 1748 is not characterised. 1749 1750 Correspond to NoCut and Unknown. 1751 1752 Internal use only. Not meant to be instantiated. 1753 """ 1754 1755 @classmethod
1756 - def _drop(cls):
1757 """RE._drop() -> list. 1758 1759 for internal use only. 1760 1761 drop the site that are situated outside the sequence in linear 1762 sequence. modify the index for site in circular sequences. 1763 """ 1764 if cls.dna.is_linear(): 1765 return 1766 else: 1767 length = len(cls.dna) 1768 for index, location in enumerate(cls.results): 1769 if location < 1: 1770 cls.results[index] += length 1771 else: 1772 break 1773 for index, location in enumerate(cls.results[:-1]): 1774 if location > length: 1775 cls.results[-(index + 1)] -= length 1776 else: 1777 break 1778 return
1779 1780 @classmethod
1781 - def is_defined(cls):
1782 """RE.is_defined() -> bool. 1783 1784 True if the sequence recognised and cut is constant, 1785 i.e. the recognition site is not degenerated AND the enzyme cut inside 1786 the site. 1787 1788 see also: 1789 RE.is_ambiguous() 1790 RE.is_unknown() 1791 """ 1792 return False
1793 1794 @classmethod
1795 - def is_ambiguous(cls):
1796 """RE.is_ambiguous() -> bool. 1797 1798 True if the sequence recognised and cut is ambiguous, 1799 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1800 the site. 1801 1802 see also: 1803 RE.is_defined() 1804 RE.is_unknown() 1805 """ 1806 return False
1807 1808 @classmethod
1809 - def is_unknown(cls):
1810 """RE.is_unknown() -> bool. 1811 1812 True if the sequence is unknown, 1813 i.e. the recognition site has not been characterised yet. 1814 1815 see also: 1816 RE.is_defined() 1817 RE.is_ambiguous()""" 1818 return True
1819 1820 @classmethod
1821 - def _mod2(cls, other):
1822 """RE._mod2(other) -> bool. 1823 1824 for internal use only 1825 1826 test for the compatibility of restriction ending of RE and other. 1827 """ 1828 # 1829 # Normally we should not arrive here. But well better safe than 1830 # sorry. 1831 # the overhang is not defined we are compatible with nobody. 1832 # could raise an Error may be rather than return quietly. 1833 # 1834 # return False 1835 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" 1836 % (str(cls), str(other), str(cls)))
1837 1838 @classmethod
1839 - def elucidate(cls):
1840 """RE.elucidate() -> str 1841 1842 return a representation of the site with the cut on the (+) strand 1843 represented as '^' and the cut on the (-) strand as '_'. 1844 ie: 1845 >>> EcoRI.elucidate() # 5' overhang 1846 'G^AATT_C' 1847 >>> KpnI.elucidate() # 3' overhang 1848 'G_GTAC^C' 1849 >>> EcoRV.elucidate() # blunt 1850 'GAT^_ATC' 1851 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1852 '? GTATAC ?' 1853 >>> 1854 """ 1855 return '? %s ?' % cls.site
1856
1857 1858 -class Commercially_available(AbstractCut):
1859 # 1860 # Recent addition to Rebase make this naming convention uncertain. 1861 # May be better to says enzymes which have a supplier. 1862 # 1863 """Implement the methods specific to the enzymes which are commercially 1864 available. 1865 1866 Internal use only. Not meant to be instantiated. 1867 """ 1868 1869 @classmethod
1870 - def suppliers(cls):
1871 """RE.suppliers() -> print the suppliers of RE.""" 1872 for s in cls.suppl: 1873 print(suppliers_dict[s][0] + ',') 1874 return
1875 1876 @classmethod
1877 - def supplier_list(cls):
1878 """RE.supplier_list() -> list. 1879 1880 list of the supplier names for RE. 1881 """ 1882 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1883 1884 @classmethod
1885 - def buffers(cls, supplier):
1886 """RE.buffers(supplier) -> string. 1887 1888 not implemented yet. 1889 """ 1890 return
1891 1892 @classmethod
1893 - def is_comm(cls):
1894 """RE.iscomm() -> bool. 1895 1896 True if RE has suppliers. 1897 """ 1898 return True
1899
1900 1901 -class Not_available(AbstractCut):
1902 """Implement the methods specific to the enzymes which are not commercially 1903 available. 1904 1905 Internal use only. Not meant to be instantiated. 1906 """ 1907 1908 @staticmethod
1909 - def suppliers():
1910 """RE.suppliers() -> print the suppliers of RE.""" 1911 return None
1912 1913 @classmethod
1914 - def supplier_list(cls):
1915 """RE.supplier_list() -> list. 1916 1917 list of the supplier names for RE. 1918 """ 1919 return []
1920 1921 @classmethod
1922 - def buffers(cls, supplier):
1923 """RE.buffers(supplier) -> string. 1924 1925 not implemented yet. 1926 """ 1927 raise TypeError("Enzyme not commercially available.")
1928 1929 @classmethod
1930 - def is_comm(cls):
1931 """RE.iscomm() -> bool. 1932 1933 True if RE has suppliers. 1934 """ 1935 return False
1936
1937 1938 ############################################################################### 1939 # # 1940 # Restriction Batch # 1941 # # 1942 ############################################################################### 1943 1944 1945 -class RestrictionBatch(set):
1946
1947 - def __init__(self, first=(), suppliers=()):
1948 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1949 first = [self.format(x) for x in first] 1950 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1951 set.__init__(self, first) 1952 self.mapping = dict.fromkeys(self) 1953 self.already_mapped = None
1954
1955 - def __str__(self):
1956 if len(self) < 5: 1957 return '+'.join(self.elements()) 1958 else: 1959 return '...'.join(('+'.join(self.elements()[:2]), 1960 '+'.join(self.elements()[-2:])))
1961
1962 - def __repr__(self):
1963 return 'RestrictionBatch(%s)' % self.elements()
1964
1965 - def __contains__(self, other):
1966 try: 1967 other = self.format(other) 1968 except ValueError: # other is not a restriction enzyme 1969 return False 1970 return set.__contains__(self, other)
1971
1972 - def __div__(self, other):
1973 return self.search(other)
1974
1975 - def __rdiv__(self, other):
1976 return self.search(other)
1977
1978 - def get(self, enzyme, add=False):
1979 """B.get(enzyme[, add]) -> enzyme class. 1980 1981 if add is True and enzyme is not in B add enzyme to B. 1982 if add is False (which is the default) only return enzyme. 1983 if enzyme is not a RestrictionType or can not be evaluated to 1984 a RestrictionType, raise a ValueError. 1985 """ 1986 e = self.format(enzyme) 1987 if e in self: 1988 return e 1989 elif add: 1990 self.add(e) 1991 return e 1992 else: 1993 raise ValueError('enzyme %s is not in RestrictionBatch' 1994 % e.__name__)
1995
1996 - def lambdasplit(self, func):
1997 """B.lambdasplit(func) -> RestrictionBatch . 1998 1999 the new batch will contains only the enzymes for which 2000 func return True. 2001 """ 2002 d = [x for x in filter(func, self)] 2003 new = RestrictionBatch() 2004 new._data = dict(zip(d, [True] * len(d))) 2005 return new
2006
2007 - def add_supplier(self, letter):
2008 """B.add_supplier(letter) -> add a new set of enzyme to B. 2009 2010 letter represents the suppliers as defined in the dictionary 2011 RestrictionDictionary.suppliers 2012 return None. 2013 raise a KeyError if letter is not a supplier code. 2014 """ 2015 supplier = suppliers_dict[letter] 2016 self.suppliers.append(letter) 2017 for x in supplier[1]: 2018 self.add_nocheck(eval(x)) 2019 return
2020
2021 - def current_suppliers(self):
2022 """B.current_suppliers() -> add a new set of enzyme to B. 2023 2024 return a sorted list of the suppliers which have been used to 2025 create the batch. 2026 """ 2027 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) 2028 return suppl_list
2029
2030 - def __iadd__(self, other):
2031 """ b += other -> add other to b, check the type of other.""" 2032 self.add(other) 2033 return self
2034
2035 - def __add__(self, other):
2036 """ b + other -> new RestrictionBatch.""" 2037 new = self.__class__(self) 2038 new.add(other) 2039 return new
2040
2041 - def remove(self, other):
2042 """B.remove(other) -> remove other from B if other is a 2043 RestrictionType. 2044 2045 Safe set.remove method. Verify that other is a RestrictionType or can 2046 be evaluated to a RestrictionType. 2047 raise a ValueError if other can not be evaluated to a RestrictionType. 2048 raise a KeyError if other is not in B. 2049 """ 2050 return set.remove(self, self.format(other))
2051
2052 - def add(self, other):
2053 """B.add(other) -> add other to B if other is a RestrictionType. 2054 2055 Safe set.add method. Verify that other is a RestrictionType or can be 2056 evaluated to a RestrictionType. 2057 raise a ValueError if other can not be evaluated to a RestrictionType. 2058 """ 2059 return set.add(self, self.format(other))
2060
2061 - def add_nocheck(self, other):
2062 """B.add_nocheck(other) -> add other to B. don't check type of other. 2063 """ 2064 return set.add(self, other)
2065
2066 - def format(self, y):
2067 """B.format(y) -> RestrictionType or raise ValueError. 2068 2069 if y is a RestrictionType return y 2070 if y can be evaluated to a RestrictionType return eval(y) 2071 raise a Value Error in all other case. 2072 """ 2073 try: 2074 if isinstance(y, RestrictionType): 2075 return y 2076 elif isinstance(eval(str(y)), RestrictionType): 2077 return eval(y) 2078 else: 2079 pass 2080 except (NameError, SyntaxError): 2081 pass 2082 raise ValueError('%s is not a RestrictionType' % y.__class__)
2083
2084 - def is_restriction(self, y):
2085 """B.is_restriction(y) -> bool. 2086 2087 True is y or eval(y) is a RestrictionType. 2088 """ 2089 return (isinstance(y, RestrictionType) or 2090 isinstance(eval(str(y)), RestrictionType))
2091
2092 - def split(self, *classes, **bool):
2093 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 2094 2095 it works but it is slow, so it has really an interest when splitting 2096 over multiple conditions. 2097 """ 2098 def splittest(element): 2099 for klass in classes: 2100 b = bool.get(klass.__name__, True) 2101 if issubclass(element, klass): 2102 if b: 2103 continue 2104 else: 2105 return False 2106 elif b: 2107 return False 2108 else: 2109 continue 2110 return True
2111 d = [k for k in filter(splittest, self)] 2112 new = RestrictionBatch() 2113 new._data = dict(zip(d, [True] * len(d))) 2114 return new
2115
2116 - def elements(self):
2117 """B.elements() -> tuple. 2118 2119 give all the names of the enzymes in B sorted alphabetically. 2120 """ 2121 l = sorted(str(e) for e in self) 2122 return l
2123
2124 - def as_string(self):
2125 """B.as_string() -> list. 2126 2127 return a list of the name of the elements of B. 2128 """ 2129 return [str(e) for e in self]
2130 2131 @classmethod
2132 - def suppl_codes(cls):
2133 """B.suppl_codes() -> dict 2134 2135 letter code for the suppliers 2136 """ 2137 supply = dict((k, v[0]) for k, v in suppliers_dict.items()) 2138 return supply
2139 2140 @classmethod
2141 - def show_codes(cls):
2142 """B.show_codes() -> letter codes for the suppliers""" 2143 supply = [' = '.join(i) for i in cls.suppl_codes().items()] 2144 print('\n'.join(supply)) 2145 return
2146
2147 - def search(self, dna, linear=True):
2148 """B.search(dna) -> dict.""" 2149 # 2150 # here we replace the search method of the individual enzymes 2151 # with one unique testing method. 2152 # 2153 if not hasattr(self, "already_mapped"): 2154 # TODO - Why does this happen! 2155 # Try the "doctest" at the start of PrintFormat.py 2156 self.already_mapped = None 2157 if isinstance(dna, DNA): 2158 # For the searching, we just care about the sequence as a string, 2159 # if that is the same we can use the cached search results. 2160 # At the time of writing, Seq == method isn't implemented, 2161 # and therefore does object identity which is stricter. 2162 if (str(dna), linear) == self.already_mapped: 2163 return self.mapping 2164 else: 2165 self.already_mapped = str(dna), linear 2166 fseq = FormattedSeq(dna, linear) 2167 self.mapping = dict((x, x.search(fseq)) for x in self) 2168 return self.mapping 2169 elif isinstance(dna, FormattedSeq): 2170 if (str(dna), dna.linear) == self.already_mapped: 2171 return self.mapping 2172 else: 2173 self.already_mapped = str(dna), dna.linear 2174 self.mapping = dict((x, x.search(dna)) for x in self) 2175 return self.mapping 2176 raise TypeError("Expected Seq or MutableSeq instance, got %s instead" 2177 % type(dna))
2178
2179 ############################################################################### 2180 # # 2181 # Restriction Analysis # 2182 # # 2183 ############################################################################### 2184 2185 2186 -class Analysis(RestrictionBatch, PrintFormat):
2187
2188 - def __init__(self, restrictionbatch=RestrictionBatch(), sequence=DNA(''), 2189 linear=True):
2190 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2191 2192 For most of the method of this class if a dictionary is given it will 2193 be used as the base to calculate the results. 2194 If no dictionary is given a new analysis using the Restriction Batch 2195 which has been given when the Analysis class has been instantiated.""" 2196 RestrictionBatch.__init__(self, restrictionbatch) 2197 self.rb = restrictionbatch 2198 self.sequence = sequence 2199 self.linear = linear 2200 if self.sequence: 2201 self.search(self.sequence, self.linear)
2202
2203 - def __repr__(self):
2204 return 'Analysis(%s,%s,%s)' %\ 2205 (repr(self.rb), repr(self.sequence), self.linear)
2206
2207 - def _sub_set(self, wanted):
2208 """A._sub_set(other_set) -> dict. 2209 2210 Internal use only. 2211 2212 screen the results through wanted set. 2213 Keep only the results for which the enzymes is in wanted set. 2214 """ 2215 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2216
2217 - def _boundaries(self, start, end):
2218 """A._boundaries(start, end) -> tuple. 2219 2220 Format the boundaries for use with the methods that limit the 2221 search to only part of the sequence given to analyse. 2222 """ 2223 if not isinstance(start, int): 2224 raise TypeError('expected int, got %s instead' % type(start)) 2225 if not isinstance(end, int): 2226 raise TypeError('expected int, got %s instead' % type(end)) 2227 if start < 1: 2228 start += len(self.sequence) 2229 if end < 1: 2230 end += len(self.sequence) 2231 if start < end: 2232 pass 2233 else: 2234 start, end == end, start 2235 if start < 1: 2236 start == 1 2237 if start < end: 2238 return start, end, self._test_normal 2239 else: 2240 return start, end, self._test_reverse
2241
2242 - def _test_normal(self, start, end, site):
2243 """A._test_normal(start, end, site) -> bool. 2244 2245 Internal use only 2246 Test if site is in between start and end. 2247 """ 2248 return start <= site < end
2249
2250 - def _test_reverse(self, start, end, site):
2251 """A._test_reverse(start, end, site) -> bool. 2252 2253 Internal use only 2254 Test if site is in between end and start (for circular sequences). 2255 """ 2256 return start <= site <= len(self.sequence) or 1 <= site < end
2257
2258 - def print_that(self, dct=None, title='', s1=''):
2259 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2260 2261 If dct is not given the full dictionary is used. 2262 """ 2263 if not dct: 2264 dct = self.mapping 2265 print("") 2266 return PrintFormat.print_that(self, dct, title, s1)
2267
2268 - def change(self, **what):
2269 """A.change(**attribute_name) -> Change attribute of Analysis. 2270 2271 It is possible to change the width of the shell by setting 2272 self.ConsoleWidth to what you want. 2273 self.NameWidth refer to the maximal length of the enzyme name. 2274 2275 Changing one of these parameters here might not give the results 2276 you expect. In which case, you can settle back to a 80 columns shell 2277 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2278 you get it right. 2279 """ 2280 for k, v in what.items(): 2281 if k in ('NameWidth', 'ConsoleWidth'): 2282 setattr(self, k, v) 2283 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2284 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2285 elif k is 'sequence': 2286 setattr(self, 'sequence', v) 2287 self.search(self.sequence, self.linear) 2288 elif k is 'rb': 2289 self = Analysis.__init__(self, v, self.sequence, self.linear) 2290 elif k is 'linear': 2291 setattr(self, 'linear', v) 2292 self.search(self.sequence, v) 2293 elif k in ('Indent', 'Maxsize'): 2294 setattr(self, k, v) 2295 elif k in ('Cmodulo', 'PrefWidth'): 2296 raise AttributeError( 2297 'To change %s, change NameWidth and/or ConsoleWidth' 2298 % name) 2299 else: 2300 raise AttributeError( 2301 'Analysis has no attribute %s' % name) 2302 return
2303
2304 - def full(self, linear=True):
2305 """A.full() -> dict. 2306 2307 Full Restriction Map of the sequence. 2308 """ 2309 return self.mapping
2310
2311 - def blunt(self, dct=None):
2312 """A.blunt([dct]) -> dict. 2313 2314 Only the enzymes which have a 3'overhang restriction site. 2315 """ 2316 if not dct: 2317 dct = self.mapping 2318 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2319
2320 - def overhang5(self, dct=None):
2321 """A.overhang5([dct]) -> dict. 2322 2323 Only the enzymes which have a 5' overhang restriction site. 2324 """ 2325 if not dct: 2326 dct = self.mapping 2327 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2328
2329 - def overhang3(self, dct=None):
2330 """A.Overhang3([dct]) -> dict. 2331 2332 Only the enzymes which have a 3'overhang restriction site. 2333 """ 2334 if not dct: 2335 dct = self.mapping 2336 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2337
2338 - def defined(self, dct=None):
2339 """A.defined([dct]) -> dict. 2340 2341 Only the enzymes that have a defined restriction site in Rebase. 2342 """ 2343 if not dct: 2344 dct = self.mapping 2345 return dict((k, v) for k, v in dct.items() if k.is_defined())
2346
2347 - def with_sites(self, dct=None):
2348 """A.with_sites([dct]) -> dict. 2349 2350 Enzymes which have at least one site in the sequence. 2351 """ 2352 if not dct: 2353 dct = self.mapping 2354 return dict((k, v) for k, v in dct.items() if v)
2355
2356 - def without_site(self, dct=None):
2357 """A.without_site([dct]) -> dict. 2358 2359 Enzymes which have no site in the sequence. 2360 """ 2361 if not dct: 2362 dct = self.mapping 2363 return dict((k, v) for k, v in dct.items() if not v)
2364
2365 - def with_N_sites(self, N, dct=None):
2366 """A.With_N_Sites(N [, dct]) -> dict. 2367 2368 Enzymes which cut N times the sequence. 2369 """ 2370 if not dct: 2371 dct = self.mapping 2372 return dict((k, v) for k, v in dct.items()if len(v) == N)
2373
2374 - def with_number_list(self, list, dct=None):
2375 if not dct: 2376 dct = self.mapping 2377 return dict((k, v) for k, v in dct.items() if len(v) in list)
2378
2379 - def with_name(self, names, dct=None):
2380 """A.with_name(list_of_names [, dct]) -> 2381 2382 Limit the search to the enzymes named in list_of_names. 2383 """ 2384 for i, enzyme in enumerate(names): 2385 if enzyme not in AllEnzymes: 2386 print("no data for the enzyme: %s" % name) 2387 del names[i] 2388 if not dct: 2389 return RestrictionBatch(names).search(self.sequence) 2390 return dict((n, dct[n]) for n in names if n in dct)
2391
2392 - def with_site_size(self, site_size, dct=None):
2393 """A.with_site_size(site_size [, dct]) -> 2394 2395 Limit the search to the enzymes whose site is of size <site_size>. 2396 """ 2397 sites = [name for name in self if name.size == site_size] 2398 if not dct: 2399 return RestrictionBatch(sites).search(self.sequence) 2400 return dict((k, v) for k, v in dct.items() if k in site_size)
2401
2402 - def only_between(self, start, end, dct=None):
2403 """A.only_between(start, end[, dct]) -> dict. 2404 2405 Enzymes that cut the sequence only in between start and end. 2406 """ 2407 start, end, test = self._boundaries(start, end) 2408 if not dct: 2409 dct = self.mapping 2410 d = dict(dct) 2411 for key, sites in dct.items(): 2412 if not sites: 2413 del d[key] 2414 continue 2415 for site in sites: 2416 if test(start, end, site): 2417 continue 2418 else: 2419 del d[key] 2420 break 2421 return d
2422
2423 - def between(self, start, end, dct=None):
2424 """A.between(start, end [, dct]) -> dict. 2425 2426 Enzymes that cut the sequence at least in between start and end. 2427 They may cut outside as well. 2428 """ 2429 start, end, test = self._boundaries(start, end) 2430 d = {} 2431 if not dct: 2432 dct = self.mapping 2433 for key, sites in dct.items(): 2434 for site in sites: 2435 if test(start, end, site): 2436 d[key] = sites 2437 break 2438 continue 2439 return d
2440
2441 - def show_only_between(self, start, end, dct=None):
2442 """A.show_only_between(start, end [, dct]) -> dict. 2443 2444 Enzymes that cut the sequence outside of the region 2445 in between start and end but do not cut inside. 2446 """ 2447 d = [] 2448 if start <= end: 2449 d = [(k, [vv for vv in v if start <= vv <= end]) 2450 for v in self.between(start, end, dct)] 2451 else: 2452 d = [(k, [vv for vv in v if start <= vv or vv <= end]) 2453 for v in self.between(start, end, dct)] 2454 return dict(d)
2455
2456 - def only_outside(self, start, end, dct=None):
2457 """A.only_outside(start, end [, dct]) -> dict. 2458 2459 Enzymes that cut the sequence outside of the region 2460 in between start and end but do not cut inside. 2461 """ 2462 start, end, test = self._boundaries(start, end) 2463 if not dct: 2464 dct = self.mapping 2465 d = dict(dct) 2466 for key, sites in dct.items(): 2467 if not sites: 2468 del d[key] 2469 continue 2470 for site in sites: 2471 if test(start, end, site): 2472 del d[key] 2473 break 2474 else: 2475 continue 2476 return d
2477
2478 - def outside(self, start, end, dct=None):
2479 """A.outside((start, end [, dct]) -> dict. 2480 2481 Enzymes that cut outside the region in between start and end. 2482 No test is made to know if they cut or not inside this region. 2483 """ 2484 start, end, test = self._boundaries(start, end) 2485 if not dct: 2486 dct = self.mapping 2487 d = {} 2488 for key, sites in dct.items(): 2489 for site in sites: 2490 if test(start, end, site): 2491 continue 2492 else: 2493 d[key] = sites 2494 break 2495 return d
2496
2497 - def do_not_cut(self, start, end, dct=None):
2498 """A.do_not_cut(start, end [, dct]) -> dict. 2499 2500 Enzymes that do not cut the region in between start and end. 2501 """ 2502 if not dct: 2503 dct = self.mapping 2504 d = self.without_site() 2505 d.update(self.only_outside(start, end, dct)) 2506 return d
2507 2508 # 2509 # The restriction enzyme classes are created dynamically when the module is 2510 # imported. Here is the magic which allow the creation of the 2511 # restriction-enzyme classes. 2512 # 2513 # The reason for the two dictionaries in Restriction_Dictionary 2514 # one for the types (which will be called pseudo-type as they really 2515 # correspond to the values that instances of RestrictionType can take) 2516 # and one for the enzymes is efficiency as the bases are evaluated 2517 # once per pseudo-type. 2518 # 2519 # However Restriction is still a very inefficient module at import. But 2520 # remember that around 660 classes (which is more or less the size of Rebase) 2521 # have to be created dynamically. However, this processing take place only 2522 # once. 2523 # This inefficiency is however largely compensated by the use of metaclass 2524 # which provide a very efficient layout for the class themselves mostly 2525 # alleviating the need of if/else loops in the class methods. 2526 # 2527 # It is essential to run Restriction with doc string optimisation (-OO 2528 # switch) as the doc string of 660 classes take a lot of processing. 2529 # 2530 CommOnly = RestrictionBatch() # commercial enzymes 2531 NonComm = RestrictionBatch() # not available commercially 2532 for TYPE, (bases, enzymes) in typedict.items(): 2533 # 2534 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2535 # The names are not important and are only present to differentiate 2536 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2537 # These names will not be used after and the pseudo-types are not 2538 # kept in the locals() dictionary. It is therefore impossible to 2539 # import them. 2540 # Now, if you have look at the dictionary, you will see that not all the 2541 # types are present as those without corresponding enzymes have been 2542 # removed by Dictionary_Builder(). 2543 # 2544 # The values are tuples which contain 2545 # as first element a tuple of bases (as string) and 2546 # as second element the names of the enzymes. 2547 # 2548 # First eval the bases. 2549 # 2550 bases = tuple(eval(x) for x in bases) 2551 # 2552 # now create the particular value of RestrictionType for the classes 2553 # in enzymes. 2554 # 2555 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2556 for k in enzymes: 2557 # 2558 # Now, we go through all the enzymes and assign them their type. 2559 # enzymedict[k] contains the values of the attributes for this 2560 # particular class (self.site, self.ovhg,....). 2561 # 2562 newenz = T(k, bases, enzymedict[k]) 2563 # 2564 # we add the enzymes to the corresponding batch. 2565 # 2566 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2567 # 2568 if newenz.is_comm(): 2569 CommOnly.add_nocheck(newenz) 2570 else: 2571 NonComm.add_nocheck(newenz) 2572 # 2573 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2574 # 2575 AllEnzymes = CommOnly | NonComm 2576 # 2577 # Now, place the enzymes in locals so they can be imported. 2578 # 2579 names = [str(x) for x in AllEnzymes] 2580 try: 2581 del x 2582 except NameError: 2583 # Scoping changed in Python 3, the variable isn't leaked 2584 pass 2585 locals().update(dict(zip(names, AllEnzymes))) 2586 __all__ = ['FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 2587 'CommOnly', 'NonComm'] + names 2588 del k, enzymes, TYPE, bases, names 2589